# input
tse_file <- here::here("data", "processed", "tse_cleaned.rds")
igc_file <- here::here("data", "raw", "dataTable.rds")
# output
out_dir <- here::here("data", "02_alpha_diversity")
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
# confounders
confounders <- c(
"treatment",
"time_point",
"gender",
"risk_group",
"record_id",
"age",
"HIV_VL"
)
# formula
mod_formula <- glue::glue(
"log_ratio ~ treatment * time_point + gender + scale(age) + risk_group + ",
"log1p(HIV_VL) + (1|record_id)"
)02 - Alpha Diversity Analysis
1 Define the input and output paths
2 Set-up
# load libraries
library(magrittr)
library(patchwork)
library(tidyplots)
suppressPackageStartupMessages(library(tidySingleCellExperiment))3 Computate Alpha Diversity Metrics
3.1 Load TreeSE
# load
tse <- readr::read_rds(tse_file)3.2 Add Gene Richness
# load
igc_df <- readr::read_rds(igc_file) %>% tibble::as_tibble()
# threshold
threshold <-
igc_df %>%
dplyr::group_by(SampleID) %>%
dplyr::summarise(NumberMappedReads = max(NumberMappedReads)) %>%
dplyr::summarise(q = quantile(NumberMappedReads, 0.02)) %>%
dplyr::pull()
# extract
igc_df <-
igc_df %>%
dplyr::filter(ReadCountReal >= threshold) %>%
dplyr::group_by(SampleID) %>%
dplyr::summarise(gene_richness = min(GeneNumber))
# merge
tse <- tse %>% dplyr::left_join(igc_df, by = "SampleID")Loading required namespace: TreeSummarizedExperiment
# transform
tse <- mia::transformAssay(tse, method = "relab")3.3 Compute Alpha Diversity Metrics
# select metrics
metrics <- c(
"shannon_diversity", "gini_dominance","observed_richness", "pielou_evenness"
)
# compute
tse <-
tse %>%
mia::addAlpha(
index = metrics,
sample = quantile(colSums(assay(tse, "counts")), 0.02),
niter = 10,
BPPARAM = BiocParallel::MulticoreParam()
)4 Plot Alpha Diversity Metrics
4.1 Using wilcox.test
Although is not appropriate for longitudinal data, because there is no independence between samples, we can use it to get a first idea of the differences between treatments.
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map( ~ {
# prepare data
plt_df <- colData(tse) %>% tibble::as_tibble()
# calculate stats
stats_2 <-
plt_df %>%
rstatix::group_by(treatment) %>%
rstatix::wilcox_test(
formula = formula(glue::glue("{.x} ~ time_point")),
p.adjust.method = "fdr"
) %>%
rstatix::add_significance() %>%
rstatix::add_xy_position(x = "time_point", group = "treatment") %>%
dplyr::mutate(y.position = y.position)
# plot
plt <-
plt_df %>%
tidyr::drop_na(!!dplyr::sym(.x)) %>%
tidyplots::tidyplot(x = time_point, y = !!dplyr::sym(.x), colour = treatment) %>%
tidyplots::add_boxplot(show_outliers = FALSE) %>%
tidyplots::add_data_points_jitter(alpha = 0.4) %>%
tidyplots::add_test_asterisks(
method = "wilcox_test", hide_info = TRUE, bracket.nudge.y = 0.05, tip.length = 0.01
) %>%
tidyplots::add(ggpubr::stat_pvalue_manual(
stats_2, label = "p.adj.signif", hide.ns = TRUE, tip.length = 0.01,
)) %>%
tidyplots::adjust_x_axis_title("Time Point (weeks)") %>%
tidyplots::adjust_legend_title("Treatment") %>%
tidyplots::adjust_y_axis_title(
.x %>% stringr::str_replace_all("_", " ") %>% stringr::str_to_title()
) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly)
if (.x == "gene_richness") {
plt <- plt %>% tidyplots::adjust_y_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 40, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(out_dir, glue::glue("alpha_{.x}.pdf")),
view_plot = FALSE
)
plt
})4.2 Using Linear Mixed Models
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map( ~ {
.metric <- rlang::sym(.x)
# prepare data
plt_df <-
colData(tse) %>%
tibble::as_tibble() %>%
dplyr::select(dplyr::all_of(c(confounders, .x))) %>%
dplyr::arrange(record_id, time_point) %>%
dplyr::group_by(record_id) %>%
tidyr::drop_na(dplyr::all_of(.metric)) %>%
dplyr::mutate(
dplyr::across(dplyr::where(is.character), as.factor),
treatment = factor(treatment, levels = c("DRV/r", "DTG")),
tp = as.character(time_point) %>% as.numeric(),
ratio = .data[[.x]] / .data[[.x]][tp == min(tp)][1],
log_ratio = log2(ratio)
) %>%
dplyr::ungroup()
# lmm
lmm_res <-
stats::as.formula(mod_formula) %>%
lmerTest::lmer(data = plt_df)
# emmeans longitudinal
emm <- lmm_res %>% emmeans::emmeans(~ time_point | treatment)
lon_emm_df <- emm %>% broom.mixed::tidy(conf.int = TRUE)
lon_contrast_df <-
emm %>%
emmeans::contrast(method = "revpairwise", adjust = "fdr") %>%
broom.mixed::tidy(conf.int = TRUE)
# emmeand by group
emm <- lmm_res %>% emmeans::emmeans(~ treatment | time_point)
group_emm_df <- emm %>% broom.mixed::tidy(conf.int = TRUE)
group_contrast_df <-
emm %>%
emmeans::contrast(method = "revpairwise", adjust = "fdr") %>%
broom.mixed::tidy(conf.int = TRUE) %>%
dplyr::mutate(p.adj.global = p.adjust(p.value, method = "fdr"))
# forest plot longitudinal
forest_plt_longitudinal <-
lon_contrast_df %>%
rstatix::add_significance(p.col = "adj.p.value") %>%
dplyr::mutate(
contrast_clean = stringr::str_remove_all(contrast, "time_point"),
t1 = as.numeric(stringr::str_extract(contrast_clean, "^[0-9]+")),
t2 = as.numeric(stringr::str_extract(contrast_clean, "(?<=- )[0-9]+")),
treatment = factor(treatment, levels = c("DRV/r", "DTG"))
) %>%
dplyr::arrange(t2, t1) %>%
dplyr::mutate(contrast = factor(contrast_clean, levels = unique(contrast_clean))) %>%
tidyplots::tidyplot(x = estimate, y = contrast, colour = treatment) %>%
tidyplots::add(geom_pointrange(
aes(xmin = conf.low, xmax = conf.high),
alpha = 0.8,
position = position_dodge(width = 0.6),
size = 2/ggplot2::.pt
)) %>%
tidyplots::add_reference_lines(x = 0) %>%
tidyplots::add(geom_text(
aes(
x = conf.high * 1.2,
label = dplyr::if_else(adj.p.value.signif != "ns", adj.p.value.signif, "")
),
position = position_dodge(width = 0.6),
size = 7,
hjust = 0,
colour = "black"
)) %>%
tidyplots::add(expand_limits(x = max(lon_contrast_df$conf.high, na.rm = TRUE) * 1.25)) %>%
tidyplots::adjust_x_axis_title("Δ log2(ratio) (later − earlier time point)") %>%
tidyplots::adjust_y_axis_title("Comparison of time points") %>%
tidyplots::adjust_legend_title("Treatment") %>%
tidyplots::remove_legend()
# forest plot by group
forest_plt_group <-
group_contrast_df %>%
rstatix::add_significance(p.col = "p.adj.global") %>%
tidyplots::tidyplot(x = estimate, y = time_point) %>%
tidyplots::add(geom_pointrange(
aes(xmin = conf.low, xmax = conf.high),
alpha = 0.8,
position = position_dodge(width = 0.6),
size = 2/ggplot2::.pt,
colour = "#252525"
)) %>%
tidyplots::add_reference_lines(x = 0) %>%
tidyplots::add(geom_text(
aes(
x = conf.high * 1.2,
label = dplyr::if_else(p.adj.global.signif != "ns", p.adj.global.signif, "")
),
position = position_dodge(width = 0.6),
size = 7,
hjust = 0,
colour = "black"
)) %>%
tidyplots::add(expand_limits(x = max(group_contrast_df$conf.high, na.rm = TRUE) * 1.25)) %>%
tidyplots::adjust_x_axis_title("Δ log2 ratio (DTG - DRV/r)") %>%
tidyplots::adjust_y_axis_title("Time point (weeks)")
# Trajectory plot
traj_plt <-
lon_emm_df %>%
dplyr::mutate(
time_point = as.numeric(as.character(time_point)),
treatment = factor(treatment, levels = c("DRV/r", "DTG"))
) %>%
tidyplots::tidyplot(x = time_point, y = estimate, colour = treatment, group = treatment) %>%
tidyplots::add(geom_pointrange(
aes(y = estimate, ymin = conf.low, ymax = conf.high),
alpha = 0.8,
position = position_dodge(width = 50/ggplot2::.pt),
size = 2 / ggplot2::.pt
)) %>%
tidyplots::add_line() %>%
tidyplots::add_reference_lines(y = 0) %>%
tidyplots::adjust_x_axis_title("Time Point (weeks)") %>%
tidyplots::adjust_y_axis_title("Estimated Change in Log2 Ratio") %>%
tidyplots::adjust_legend_title("Treatment")
# plt list
plt_list <- list(
traj_plt = traj_plt,
forest_plt_longitudinal = forest_plt_longitudinal,
forest_plt_group = forest_plt_group
)
# save
showtext::showtext_auto()
names(plt_list) %>%
purrr::walk(~ {
plt_list[[.x]] %>%
tidyplots::adjust_size(width = 40, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(out_dir, glue::glue("{.x}_{.metric}.pdf")),
view_plot = FALSE
)
})
f_plt <- wrap_plots(plt_list) + plot_layout(guides = "collect")
})Observed richness increased significantly over time in the DTG arm, while Shannon diversity showed a similar but non-significant trend. In line with this, dominance decreased over time in the DTG arm, whereas in the DRV/r arm dominance was significantly higher at week 96, indicating that communities remained skewed toward a few highly abundant taxa.
The increase in observed richness under DTG treatment suggests acquisition of additional taxa. Although Shannon diversity only showed a non-significant increasing trend, the concurrent decrease in dominance indicates that communities tended to become less skewed toward a few highly abundant species. In contrast, participants in the DRV/r arm exhibited significantly higher dominance at week 96, consistent with a community structure where a small number of taxa remained disproportionately abundant. These findings suggest that DTG may promote a more balanced microbial community structure over time, whereas DRV/r is associated with increasing dominance
Observed richness was consistently higher in the DTG arm at weeks 24, 48, and 96, with estimated differences of ~0.2–0.23 log2 ratio (≈15–18% increase) compared to DRV/r. While nominal p-values indicated significance at all three time points, these associations did not remain significant after global FDR adjustment (adjusted p ≈ 0.066).
5 Compute Correlations
5.1 Define Cytokines and Markers
populations <- c("CD4", "CD8", "CD4_CD38_DR", "CD8_CD38_DR")
cytokines <- c("CRP", "IL6", "TNFa", "sCD14")
others <- c("HIV_VL", "BMI")
all_markers <- c(populations, cytokines, others)
# Log2 Transformation
tse2 <- tse %>% dplyr::mutate(dplyr::across(dplyr::all_of(all_markers), ~ log2(.x + 1)))5.2 Compute Sperman Correlations
sparman_corr_df <-
mia::getCrossAssociation(
tse2,
tse2,
col.var1 = c(metrics, "gene_richness"),
col.var2 = all_markers,
method = "spearman",
p_adj_method = "fdr",
test.signif = TRUE,
verbose = FALSE,
show_warnings = FALSE
)
# prepare data
plt_df <-
sparman_corr_df %>%
dplyr::mutate(
sign = dplyr::case_when(
p_adj < 0.05 & abs(cor) > 0.2 ~ "< 0.05",
dplyr::between(p_adj, 0.05, 0.1) & abs(cor) > 0.2 ~ "< 0.1",
TRUE ~ "ns"
)
)
# plot
lim <- abs(plt_df$cor) %>% max() %>% round(digits = 1) * 1.05
plt <-
plt_df %>%
dplyr::mutate(lab = stringr::str_replace(Var1, "_", " ") %>% stringr::str_to_sentence()) %>%
ggplot(aes(x = Var2, y = lab, fill = cor, colour = sign)) +
geom_point(aes(size = abs(cor), alpha = sign), shape = 22, stroke = 1) +
scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11, "PiYG")),
limits = c(-lim, lim)
) +
scale_colour_manual(values = c("< 0.05" = "black", "< 0.1" = "#525252", "ns" = "white")) +
scale_alpha_manual(values = c("< 0.05" = 0.9, "< 0.1" = 0.9, "ns" = 0.6), guide = "none") +
scale_size_continuous(range = c(2, 8), guide = "none") +
theme_minimal() +
labs(
x = "", y = "", fill = "Spearman Correlation", colour = "q < 0.05"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
legend.key.height = unit(0.5, "cm"),
)
# Resize and save
r_plt <-
plt + plot_spacer() +
plot_layout(
widths = ggplot2::unit(c(75, 1), "mm"),
heights = ggplot2::unit(40, "mm")
)
tidyplots::save_plot(
r_plt,
here::here(out_dir, glue::glue("all_sparman_correlation.pdf")),
view_plot = FALSE
)
r_plt5.3 Compute Repeated Measures Correlation Coefficient
# computes correlations
rmcorr_df_all <-
tidyr::expand_grid(metric = c(metrics, "gene_richness"), all_markers) %>%
purrr::pmap_dfr( ~ {
# prepare data
f_df <-
colData(tse2) %>%
tibble::as_tibble() %>%
dplyr::select(dplyr::all_of(c(.x, .y, confounders))) %>%
tidyr::drop_na(dplyr::all_of(c(.x, .y))) %>%
dplyr::mutate(
dplyr::across(dplyr::where(is.character), as.factor),
treatment = factor(treatment, levels = c("DRV/r", "DTG"))
)
# compute rmcorr
res <- rmcorr::rmcorr(
participant = "record_id",
measure1 = f_df[[.x]],
measure2 = f_df[[.y]],
dataset = f_df
)
# create tibble
tibble::tibble(
metric = .x,
marker = .y,
cor = res$r,
p = res$p,
df = list(f_df),
rmcorr = list(res),
)
}) %>%
dplyr::mutate(p_adj = p.adjust(p, method = "fdr"), .after = p) %>%
dplyr::arrange(p_adj)
# prepare data
plt_df <-
rmcorr_df_all %>%
dplyr::mutate(
sign = dplyr::case_when(
p_adj < 0.05 & abs(cor) > 0.2 ~ "< 0.05",
dplyr::between(p_adj, 0.05, 0.1) & abs(cor) > 0.2 ~ "< 0.1",
TRUE ~ "ns"
),
marker = factor(marker, levels = all_markers),
)
# plot
lim <- abs(plt_df$cor) %>% max() %>% round(digits = 1) * 1.05
plt <-
plt_df %>%
dplyr::mutate(lab = stringr::str_replace(metric, "_", " ") %>% stringr::str_to_sentence()) %>%
ggplot(aes(x = marker, y = lab, fill = cor, colour = sign)) +
geom_point(aes(size = abs(cor), alpha = sign), shape = 22, stroke = 1) +
scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11, "PiYG")),
limits = c(-lim, lim)
) +
scale_colour_manual(values = c("< 0.05" = "black", "< 0.1" = "#525252", "ns" = "white")) +
scale_alpha_manual(values = c("< 0.05" = 0.9, "< 0.1" = 0.9, "ns" = 0.6), guide = "none") +
scale_size_continuous(range = c(2, 8), guide = "none") +
theme_minimal() +
labs(
x = "", y = "", fill = "Repeated Measures Correlation", colour = "q < 0.05"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
legend.key.height = unit(0.5, "cm"),
)
# Resize and save
r_plt <-
plt + plot_spacer() +
plot_layout(
widths = ggplot2::unit(c(75, 1), "mm"),
heights = ggplot2::unit(40, "mm")
)
tidyplots::save_plot(
r_plt,
here::here(out_dir, glue::glue("all_rm_correlation.pdf")),
view_plot = FALSE
)
r_plt6 Compute Correlations by Treatment Group
6.1 Compute Spearman Correlations
spearman_corr_df <-
mia::splitOn(tse2, "treatment") %>%
purrr::map_dfr(~ {
mia::getCrossAssociation(
.x,
.x,
col.var1 = c(metrics, "gene_richness"),
col.var2 = all_markers,
method = "spearman",
p_adj_method = "fdr",
test.signif = TRUE,
verbose = FALSE,
show_warnings = FALSE
) %>%
dplyr::mutate(treatment = unique(.x$treatment))
})
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(~ {
# prepare data
plt_df <-
spearman_corr_df %>%
dplyr::filter(Var1 == .x) %>%
dplyr::mutate(
sign = dplyr::case_when(
p_adj < 0.05 & abs(cor) > 0.2 ~ "< 0.05",
dplyr::between(p_adj, 0.05, 0.1) & abs(cor) > 0.2 ~ "< 0.1",
TRUE ~ "ns"
)
)
# plot
.name <- stringr::str_replace(.x, "_", " ") %>% stringr::str_to_title()
lim <- abs(plt_df$cor) %>% max() %>% round(digits = 1)
plt <-
plt_df %>%
ggplot(aes(x = Var2, y = treatment, fill = cor, colour = sign)) +
geom_point(aes(size = abs(cor), alpha = sign), shape = 22, stroke = 1) +
scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11, "PiYG")),
limits = c(-lim, lim)
) +
scale_colour_manual(values = c("< 0.05" = "black", "< 0.1" = "#525252", "ns" = "white")) +
scale_alpha_manual(values = c("< 0.05" = 0.9, "< 0.1" = 0.9, "ns" = 0.6), guide = "none") +
scale_size_continuous(range = c(2, 8), guide = "none") +
theme_minimal() +
labs(
x = "",
y = "Treatment",
fill = glue::glue("Spearman Correlation\n(Mark. vs. {.name})"),
colour = "q < 0.05"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
legend.key.height = unit(0.5, "cm"),
)
# Resize and save
plt <-
plt + plot_spacer() +
plot_layout(
widths = ggplot2::unit(c(70, 1), "mm"),
heights = ggplot2::unit(30, "mm")
)
tidyplots::save_plot(
plt,
here::here(out_dir, glue::glue("spearman_correlation_{.x}.pdf")),
view_plot = FALSE
)
plt
})6.2 Compute Repeated Measures Correlation Coefficient
rmcorr_df <-
mia::splitOn(tse2, "treatment") %>%
purrr::map_dfr(function(.split) {
tidyr::expand_grid(metric = c(metrics, "gene_richness"), all_markers) %>%
purrr::pmap_dfr( ~ {
# prepare data
f_df <-
colData(.split) %>%
tibble::as_tibble() %>%
dplyr::select(dplyr::all_of(c(.x, .y, confounders))) %>%
tidyr::drop_na(dplyr::all_of(c(.x, .y))) %>%
dplyr::mutate(
dplyr::across(dplyr::where(is.character), as.factor),
treatment = factor(treatment, levels = c("DRV/r", "DTG"))
)
# compute rmcorr
res <- rmcorr::rmcorr(
participant = "record_id",
measure1 = f_df[[.x]],
measure2 = f_df[[.y]],
dataset = f_df
)
# create tibble
tibble::tibble(
metric = .x,
marker = .y,
treatment = unique(.split$treatment),
cor = res$r,
p = res$p,
df = list(f_df),
rmcorr = list(res),
)
})
}) %>%
dplyr::mutate(p_adj = p.adjust(p, method = "fdr"), .after = p) %>%
dplyr::arrange(p_adj)
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(~ {
# prepare data
plt_df <-
rmcorr_df %>%
dplyr::filter(metric == .x) %>%
dplyr::mutate(
sign = dplyr::case_when(
p_adj < 0.05 & abs(cor) > 0.2 ~ "< 0.05",
dplyr::between(p_adj, 0.05, 0.1) & abs(cor) > 0.2 ~ "< 0.1",
TRUE ~ "ns"
),
marker = factor(marker, levels = all_markers)
)
# plot
.name <- stringr::str_replace(.x, "_", " ") %>% stringr::str_to_title()
lim <- abs(plt_df$cor) %>% max() %>% round(digits = 1) * 1.2
plt <-
plt_df %>%
ggplot(aes(x = marker, y = treatment, fill = cor, colour = sign)) +
geom_point(aes(size = abs(cor), alpha = sign), shape = 22, stroke = 1) +
scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11, "PiYG")),
limits = c(-lim, lim)
) +
scale_colour_manual(values = c("< 0.05" = "black", "< 0.1" = "#525252", "ns" = "white")) +
scale_alpha_manual(values = c("< 0.05" = 0.9, "< 0.1" = 0.9, "ns" = 0.6), guide = "none") +
scale_size_continuous(range = c(2, 8), guide = "none") +
theme_minimal() +
labs(
x = "",
y = "Treatment",
fill = glue::glue("Repeated Measures Correlation\n(Marker vs. {.name})"),
colour = "q < 0.05"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "top",
legend.title.position = "top",
legend.title = element_text(hjust = 0.5),
legend.key.height = unit(0.5, "cm"),
)
# Resize and save
plt <-
plt + plot_spacer() +
plot_layout(
widths = ggplot2::unit(c(70, 1), "mm"),
heights = ggplot2::unit(30, "mm")
)
tidyplots::save_plot(
plt,
here::here(out_dir, glue::glue("rm_correlation_{.x}.pdf")),
view_plot = FALSE
)
plt
})7 Scatter Correlations
7.1 Spearman Correlations - Global
dir.create(here::here(out_dir, "spearman_scat"), showWarnings = FALSE, recursive = TRUE)
plt_df <- colData(tse2) %>% tibble::as_tibble()
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(function(.metric) {
all_markers %>%
purrr::set_names() %>%
purrr::map(function(.marker) {
# prepare data
marker_values <- plt_df[[.marker]] %>% .[is.finite(.)]
lab_min <- min(marker_values, na.rm = TRUE)
lab_max <- max(marker_values, na.rm = TRUE)
lab_range <- lab_max - lab_min
.name <- stringr::str_replace(.metric, "_", " ") %>% stringr::str_to_title()
.y_lab <- glue::glue("Log2 {.marker}")
plt <-
plt_df %>%
tidyr::drop_na(!!.metric, !!.marker) %>%
tidyplots::tidyplot(
x = !!dplyr::sym(.metric),
y = !!dplyr::sym(.marker),
) %>%
tidyplots::add_data_points(alpha = 0.5) %>%
tidyplots::add(geom_smooth(method = "lm", alpha = 0.1, formula = 'y ~ x')) %>%
tidyplots::add(ggpubr::stat_cor(
method = "spearman",
cor.coef.name = "rho",
label.y.npc = "bottom",
p.digits = 1,
label.y = c(lab_min - 0.05 * lab_range, lab_min - 0.15 * lab_range),
size = 3
)) %>%
tidyplots::adjust_legend_title("Treatment") %>%
tidyplots::adjust_y_axis_title(.y_lab) %>%
tidyplots::adjust_x_axis_title(.name) %>%
tidyplots::adjust_x_axis(rotate_labels = 30) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly)
if (.metric == "gene_richness") {
plt <- plt %>% tidyplots::adjust_x_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 50, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(
out_dir,
glue::glue("spearman_scat/global_corr_{.metric}_{.marker}.pdf")
),
view_plot = FALSE
)
plt
})
})7.2 Spearman Correlations - By Treatment
plt_df <- colData(tse2) %>% tibble::as_tibble()
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(function(.metric) {
all_markers %>%
purrr::set_names() %>%
purrr::map(function(.marker) {
# prepare data
marker_values <- plt_df[[.marker]] %>% .[is.finite(.)]
lab_min <- min(marker_values, na.rm = TRUE)
lab_max <- max(marker_values, na.rm = TRUE)
lab_range <- lab_max - lab_min
.name <- stringr::str_replace(.metric, "_", " ") %>% stringr::str_to_title()
.y_lab <- glue::glue("Log2 {.marker}")
plt <-
plt_df %>%
tidyr::drop_na(!!.metric, !!.marker) %>%
tidyplots::tidyplot(
x = !!dplyr::sym(.metric),
y = !!dplyr::sym(.marker),
colour = treatment
) %>%
tidyplots::add_data_points(alpha = 0.5) %>%
tidyplots::add(geom_smooth(method = "lm", alpha = 0.1, formula = 'y ~ x')) %>%
tidyplots::add(ggpubr::stat_cor(
method = "spearman",
cor.coef.name = "rho",
label.y.npc = "bottom",
p.digits = 1,
label.y = c(lab_min - 0.05 * lab_range, lab_min - 0.15 * lab_range),
size = 3
)) %>%
tidyplots::adjust_legend_title("Treatment") %>%
tidyplots::adjust_y_axis_title(.y_lab) %>%
tidyplots::adjust_x_axis_title(.name) %>%
tidyplots::adjust_x_axis(rotate_labels = 30) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly)
if (.metric == "gene_richness") {
plt <- plt %>% tidyplots::adjust_x_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 50, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(
out_dir,
glue::glue("spearman_scat/corr_{.metric}_{.marker}.pdf")
),
view_plot = FALSE
)
plt
})
})7.3 Repeated Measures Correlation Coefficient - Global
dir.create(here::here(out_dir, "rm_scat"), showWarnings = FALSE, recursive = TRUE)
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(function(.metric) {
all_markers %>%
purrr::set_names() %>%
purrr::map(function(.marker) {
it_data <- rmcorr_df_all %>% dplyr::filter(metric == .metric, marker == .marker)
plt_df <-
it_data$df %>%
purrr::list_rbind() %>%
dplyr::mutate(fitted_vals = it_data$rmcorr[[1]]$model$fitted.values)
.y_lab <- glue::glue("Log2 {.marker}")
plt <-
plt_df %>%
tidyplots::tidyplot(x = !!rlang::sym(.metric), y = !!rlang::sym(.marker), color = record_id) %>%
tidyplots::add_data_points(alpha = 0.4) %>%
tidyplots::add(geom_line(aes(y = fitted_vals, group = record_id), alpha = 0.8)) %>%
tidyplots::adjust_x_axis_title(
.metric %>% stringr::str_replace_all("_", " ") %>% stringr::str_to_title()
) %>%
tidyplots::adjust_caption(fontsize = 7) %>%
tidyplots::adjust_y_axis_title(.y_lab) %>%
tidyplots::adjust_x_axis(rotate_labels = 30) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly) %>%
tidyplots::remove_legend() %>%
tidyplots::add_caption(
caption = glue::glue(
"r: {round(it_data$cor, 2)}; ajd. P: {formatC(it_data$p_adj,
format = 'e', digits = 2)}; n = {nrow(plt_df)}"
)
)
if (.metric == "gene_richness") {
plt <- plt %>% tidyplots::adjust_x_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 50, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(
out_dir,
glue::glue("rm_scat/global_corr_{.metric}_{.marker}.pdf")
),
view_plot = FALSE
)
plt
})
})7.4 Repeated Measures Correlation Coefficient - by Treatment
dir.create(here::here(out_dir, "rm_scat"), showWarnings = FALSE, recursive = TRUE)
plt_list <-
c(metrics, "gene_richness") %>%
purrr::set_names() %>%
purrr::map(function(.metric) {
all_markers %>%
purrr::set_names() %>%
purrr::map(function(.marker) {
it_data <- rmcorr_df %>% dplyr::filter(metric == .metric, marker == .marker)
plt_df <-
it_data$df %>%
purrr::list_rbind() %>%
dplyr::mutate(
fitted_vals = c(
it_data$rmcorr[[1]]$model$fitted.values,
it_data$rmcorr[[2]]$model$fitted.values
)
)
.y_lab <- glue::glue("Log2 {.marker}")
plt <-
plt_df %>%
tidyplots::tidyplot(x = !!rlang::sym(.metric), y = !!rlang::sym(.marker), color = treatment) %>%
tidyplots::add_data_points(alpha = 0.4) %>%
tidyplots::add(geom_line(aes(y = fitted_vals, group = record_id), alpha = 0.8)) %>%
tidyplots::adjust_x_axis_title(
.metric %>% stringr::str_replace_all("_", " ") %>% stringr::str_to_title()
) %>%
tidyplots::adjust_caption(fontsize = 7) %>%
tidyplots::adjust_y_axis_title(.y_lab) %>%
tidyplots::adjust_x_axis(rotate_labels = 30) %>%
tidyplots::adjust_colors(tidyplots::colors_discrete_friendly) %>%
tidyplots::remove_legend() %>%
tidyplots::add_caption(
caption = glue::glue(
"r: {round(it_data$cor, 2)}; ajd. P: {formatC(it_data$p_adj,
format = 'e', digits = 2)}; n = {nrow(plt_df)}"
)
)
if (.metric == "gene_richness") {
plt <- plt %>% tidyplots::adjust_x_axis(labels = scales::scientific)
}
# save
plt %>%
tidyplots::adjust_size(width = 50, height = 40, unit = "mm") %>%
tidyplots::save_plot(
here::here(
out_dir,
glue::glue("rm_scat/corr_{.metric}_{.marker}.pdf")
),
view_plot = FALSE
)
plt
})
})8 Export TSE with Alpha Metrics
tse %>% readr::write_rds(here::here("data", "processed", "tse_alpha.rds"))9 Appendix
9.1 Session Info
devtools::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.5.1 Patched (2025-07-06 r88390)
os Ubuntu 24.04.2 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Madrid
date 2025-08-22
pandoc 3.4 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/tools/x86_64/ (via rmarkdown)
quarto 1.6.42 @ /usr/lib/rstudio/resources/app/bin/quarto/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
abind 1.4-8 2024-09-12 [1] RSPM (R 4.5.0)
ape 5.8-1 2024-12-16 [1] RSPM (R 4.5.0)
backports 1.5.0 2024-05-23 [1] RSPM (R 4.5.0)
beachmat 2.24.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
beeswarm 0.4.0 2021-06-01 [1] RSPM (R 4.5.0)
Biobase * 2.68.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
BiocBaseUtils 1.10.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
BiocGenerics * 0.54.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
BiocNeighbors 2.2.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
BiocParallel 1.42.1 2025-06-01 [1] Bioconductor 3.21 (R 4.5.0)
BiocSingular 1.24.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
Biostrings 2.76.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
bluster 1.18.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
boot 1.3-31 2024-08-28 [1] RSPM (R 4.5.0)
broom 1.0.8 2025-03-28 [1] RSPM (R 4.5.0)
broom.mixed 0.2.9.6 2024-10-15 [1] RSPM (R 4.5.0)
cachem 1.1.0 2024-05-16 [1] RSPM (R 4.5.0)
car 3.1-3 2024-09-27 [1] RSPM (R 4.5.0)
carData 3.0-5 2022-01-06 [1] RSPM (R 4.5.0)
cellranger 1.1.0 2016-07-27 [1] RSPM (R 4.5.0)
cli 3.6.5 2025-04-23 [1] RSPM (R 4.5.0)
cluster 2.1.8.1 2025-03-12 [1] RSPM (R 4.5.0)
coda 0.19-4.1 2024-01-31 [1] RSPM (R 4.5.0)
codetools 0.2-20 2024-03-31 [1] RSPM (R 4.5.0)
crayon 1.5.3 2024-06-20 [1] RSPM (R 4.5.0)
data.table 1.17.8 2025-07-10 [1] RSPM (R 4.5.0)
DBI 1.2.3 2024-06-02 [1] RSPM (R 4.5.0)
DECIPHER 3.4.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
decontam 1.28.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
DelayedArray 0.34.1 2025-04-17 [1] Bioconductor 3.21 (R 4.5.0)
DelayedMatrixStats 1.30.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
devtools 2.4.5 2022-10-11 [1] RSPM (R 4.5.0)
digest 0.6.37 2024-08-19 [1] RSPM (R 4.5.0)
DirichletMultinomial 1.50.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
dplyr * 1.1.4 2023-11-17 [1] RSPM (R 4.5.0)
ellipsis 0.3.2 2021-04-29 [1] RSPM (R 4.5.0)
emmeans 1.11.2 2025-07-11 [1] RSPM (R 4.5.0)
estimability 1.5.1 2024-05-12 [1] RSPM (R 4.5.0)
evaluate 1.0.4 2025-06-18 [1] RSPM (R 4.5.0)
fansi 1.0.6 2023-12-08 [1] RSPM (R 4.5.0)
farver 2.1.2 2024-05-13 [1] RSPM (R 4.5.0)
fastmap 1.2.0 2024-05-15 [1] RSPM (R 4.5.0)
fillpattern 1.0.2 2024-06-24 [1] RSPM (R 4.5.0)
forcats 1.0.0 2023-01-29 [1] RSPM (R 4.5.0)
Formula 1.2-5 2023-02-24 [1] RSPM (R 4.5.0)
fs 1.6.6 2025-04-12 [1] RSPM (R 4.5.0)
furrr 0.3.1 2022-08-15 [1] RSPM (R 4.5.0)
future 1.58.0 2025-06-05 [1] RSPM (R 4.5.0)
generics * 0.1.4 2025-05-09 [1] RSPM (R 4.5.0)
GenomeInfoDb * 1.44.1 2025-07-23 [1] Bioconductor 3.21 (R 4.5.1)
GenomeInfoDbData 1.2.14 2025-03-27 [1] Bioconductor
GenomicRanges * 1.60.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
ggbeeswarm 0.7.2 2023-04-29 [1] RSPM (R 4.5.0)
ggnewscale 0.5.2 2025-06-20 [1] RSPM (R 4.5.0)
ggplot2 * 3.5.2 2025-04-09 [1] RSPM (R 4.5.0)
ggpubr 0.6.1 2025-06-27 [1] RSPM (R 4.5.0)
ggrepel 0.9.6 2024-09-07 [1] RSPM (R 4.5.0)
ggsignif 0.6.4 2022-10-13 [1] RSPM (R 4.5.0)
ggtext 0.1.2 2022-09-16 [1] RSPM (R 4.5.0)
globals 0.18.0 2025-05-08 [1] RSPM (R 4.5.0)
glue 1.8.0 2024-09-30 [1] RSPM (R 4.5.0)
gridExtra 2.3 2017-09-09 [1] RSPM (R 4.5.0)
gridtext 0.1.5 2022-09-16 [1] RSPM (R 4.5.0)
gtable 0.3.6 2024-10-25 [1] RSPM (R 4.5.0)
here 1.0.1 2020-12-13 [1] RSPM (R 4.5.0)
hms 1.1.3 2023-03-21 [1] RSPM (R 4.5.0)
htmltools 0.5.8.1 2024-04-04 [1] RSPM (R 4.5.0)
htmlwidgets 1.6.4 2023-12-06 [1] RSPM (R 4.5.0)
httpuv 1.6.16 2025-04-16 [1] RSPM (R 4.5.0)
httr 1.4.7 2023-08-15 [1] RSPM (R 4.5.0)
igraph 2.1.4 2025-01-23 [1] RSPM (R 4.5.0)
IRanges * 2.42.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
irlba 2.3.5.1 2022-10-03 [1] RSPM (R 4.5.0)
jsonlite 2.0.0 2025-03-27 [1] RSPM (R 4.5.0)
knitr 1.50 2025-03-16 [1] RSPM (R 4.5.0)
labeling 0.4.3 2023-08-29 [1] RSPM (R 4.5.0)
later 1.4.2 2025-04-08 [1] RSPM (R 4.5.0)
lattice 0.22-7 2025-04-02 [1] RSPM (R 4.5.0)
lazyeval 0.2.2 2019-03-15 [1] RSPM (R 4.5.0)
lifecycle 1.0.4 2023-11-07 [1] RSPM (R 4.5.0)
listenv 0.9.1 2024-01-29 [1] RSPM (R 4.5.0)
lme4 1.1-37 2025-03-26 [1] RSPM (R 4.5.0)
lmerTest 3.1-3 2020-10-23 [1] RSPM (R 4.5.0)
magrittr * 2.0.3 2022-03-30 [1] RSPM (R 4.5.0)
MASS 7.3-65 2025-02-28 [1] RSPM (R 4.5.0)
Matrix 1.7-3 2025-03-11 [1] RSPM (R 4.5.0)
MatrixGenerics * 1.20.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
matrixStats * 1.5.0 2025-01-07 [1] RSPM (R 4.5.0)
memoise 2.0.1 2021-11-26 [1] RSPM (R 4.5.0)
mgcv 1.9-3 2025-04-04 [1] RSPM (R 4.5.0)
mia 1.16.1 2025-07-13 [1] Bioconductor 3.21 (R 4.5.1)
mime 0.13 2025-03-17 [1] RSPM (R 4.5.0)
miniUI 0.1.2 2025-04-17 [1] RSPM (R 4.5.0)
minqa 1.2.8 2024-08-17 [1] RSPM (R 4.5.0)
mnormt 2.1.1 2022-09-26 [1] RSPM (R 4.5.0)
multcomp 1.4-28 2025-01-29 [1] RSPM (R 4.5.0)
MultiAssayExperiment 1.34.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
mvtnorm 1.3-3 2025-01-10 [1] RSPM (R 4.5.0)
nlme 3.1-168 2025-03-31 [1] RSPM (R 4.5.0)
nloptr 2.2.1 2025-03-17 [1] RSPM (R 4.5.0)
numDeriv 2016.8-1.1 2019-06-06 [1] RSPM (R 4.5.0)
parallelly 1.45.1 2025-07-24 [1] RSPM (R 4.5.0)
patchwork * 1.3.1 2025-06-21 [1] RSPM (R 4.5.0)
pbkrtest 0.5.5 2025-07-18 [1] RSPM (R 4.5.0)
permute 0.9-8 2025-06-25 [1] RSPM (R 4.5.0)
pillar 1.11.0 2025-07-04 [1] RSPM (R 4.5.0)
pkgbuild 1.4.8 2025-05-26 [1] RSPM (R 4.5.0)
pkgconfig 2.0.3 2019-09-22 [1] RSPM (R 4.5.0)
pkgload 1.4.0 2024-06-28 [1] RSPM (R 4.5.0)
plotly 4.11.0 2025-06-19 [1] RSPM (R 4.5.0)
plyr 1.8.9 2023-10-02 [1] RSPM (R 4.5.0)
profvis 0.4.0 2024-09-20 [1] RSPM (R 4.5.0)
promises 1.3.3 2025-05-29 [1] RSPM (R 4.5.0)
psych 2.5.6 2025-06-23 [1] RSPM (R 4.5.0)
purrr 1.1.0 2025-07-10 [1] RSPM (R 4.5.0)
R6 2.6.1 2025-02-15 [1] RSPM (R 4.5.0)
ragg 1.4.0 2025-04-10 [1] RSPM (R 4.5.0)
rbibutils 2.3 2024-10-04 [1] RSPM (R 4.5.0)
rbiom 2.2.1 2025-06-27 [1] RSPM (R 4.5.0)
RColorBrewer 1.1-3 2022-04-03 [1] RSPM (R 4.5.0)
Rcpp 1.1.0 2025-07-02 [1] RSPM (R 4.5.0)
Rdpack 2.6.4 2025-04-09 [1] RSPM (R 4.5.0)
readr 2.1.5 2024-01-10 [1] RSPM (R 4.5.0)
readxl 1.4.5 2025-03-07 [1] RSPM (R 4.5.0)
reformulas 0.4.1 2025-04-30 [1] RSPM (R 4.5.0)
remotes 2.5.0 2024-03-17 [1] RSPM (R 4.5.0)
reshape2 1.4.4 2020-04-09 [1] RSPM (R 4.5.0)
rlang 1.1.6 2025-04-11 [1] RSPM (R 4.5.0)
rmarkdown 2.29 2024-11-04 [1] RSPM (R 4.5.0)
rmcorr 0.7.0 2024-07-26 [1] RSPM
rprojroot 2.1.0 2025-07-12 [1] RSPM (R 4.5.0)
rstatix 0.7.2 2023-02-01 [1] RSPM (R 4.5.0)
rstudioapi 0.17.1 2024-10-22 [1] RSPM (R 4.5.0)
rsvd 1.0.5 2021-04-16 [1] RSPM (R 4.5.0)
S4Arrays 1.8.1 2025-06-01 [1] Bioconductor 3.21 (R 4.5.0)
S4Vectors * 0.46.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
sandwich 3.1-1 2024-09-15 [1] RSPM (R 4.5.0)
ScaledMatrix 1.16.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
scales 1.4.0 2025-04-24 [1] RSPM (R 4.5.0)
scater 1.36.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
scuttle 1.18.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
sessioninfo 1.2.3 2025-02-05 [1] RSPM (R 4.5.0)
shiny 1.11.1 2025-07-03 [1] RSPM (R 4.5.0)
showtext 0.9-7 2024-03-02 [1] RSPM (R 4.5.0)
showtextdb 3.0 2020-06-04 [1] RSPM (R 4.5.0)
SingleCellExperiment * 1.30.1 2025-05-07 [1] Bioconductor 3.21 (R 4.5.0)
slam 0.1-55 2024-11-13 [1] RSPM (R 4.5.0)
SparseArray 1.8.1 2025-07-23 [1] Bioconductor 3.21 (R 4.5.1)
sparseMatrixStats 1.20.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
stringi 1.8.7 2025-03-27 [1] RSPM (R 4.5.0)
stringr 1.5.1 2023-11-14 [1] RSPM (R 4.5.0)
SummarizedExperiment * 1.38.1 2025-04-30 [1] Bioconductor 3.21 (R 4.5.0)
survival 3.8-3 2024-12-17 [1] RSPM (R 4.5.0)
sysfonts 0.8.9 2024-03-02 [1] RSPM (R 4.5.0)
systemfonts 1.2.3 2025-04-30 [1] RSPM (R 4.5.0)
textshaping 1.0.1 2025-05-01 [1] RSPM (R 4.5.0)
TH.data 1.1-3 2025-01-17 [1] RSPM (R 4.5.0)
tibble 3.3.0 2025-06-08 [1] RSPM (R 4.5.0)
tidyplots * 0.3.1 2025-07-02 [1] RSPM (R 4.5.0)
tidyr * 1.3.1 2024-01-24 [1] RSPM (R 4.5.0)
tidyselect 1.2.1 2024-03-11 [1] RSPM (R 4.5.0)
tidySingleCellExperiment * 1.18.1 2025-05-11 [1] Bioconductor 3.21 (R 4.5.0)
tidytree 0.4.6 2023-12-12 [1] RSPM (R 4.5.0)
treeio 1.32.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
TreeSummarizedExperiment 2.16.1 2025-05-11 [1] Bioconductor 3.21 (R 4.5.0)
ttservice * 0.5.3 2025-07-10 [1] RSPM (R 4.5.0)
tzdb 0.5.0 2025-03-15 [1] RSPM (R 4.5.0)
UCSC.utils 1.4.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
urlchecker 1.0.1 2021-11-30 [1] RSPM (R 4.5.0)
usethis 3.1.0 2024-11-26 [1] RSPM (R 4.5.0)
vctrs 0.6.5 2023-12-01 [1] RSPM (R 4.5.0)
vegan 2.7-1 2025-06-05 [1] RSPM (R 4.5.0)
vipor 0.4.7 2023-12-18 [1] RSPM (R 4.5.0)
viridis 0.6.5 2024-01-29 [1] RSPM (R 4.5.0)
viridisLite 0.4.2 2023-05-02 [1] RSPM (R 4.5.0)
withr 3.0.2 2024-10-28 [1] RSPM (R 4.5.0)
xfun 0.52 2025-04-02 [1] RSPM (R 4.5.0)
xml2 1.3.8 2025-03-14 [1] RSPM (R 4.5.0)
xtable 1.8-4 2019-04-21 [1] RSPM (R 4.5.0)
XVector 0.48.0 2025-04-15 [1] Bioconductor 3.21 (R 4.5.0)
yaml 2.3.10 2024-07-26 [1] RSPM (R 4.5.0)
yulab.utils 0.2.0 2025-01-29 [1] RSPM (R 4.5.0)
zoo 1.8-14 2025-04-10 [1] RSPM (R 4.5.0)
[1] /home/fcatala/R/x86_64-pc-linux-gnu-library/4.5
[2] /opt/R/next/lib/R/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────
9.2 Contact
- Analysis Lead: Francesc Català-Moll
- Email: fcatala@irsicaixa.es
- Institution: GEM - IrsiCaixa